- (STPS) Smoke test possible solution: It is the set of tentative errors that can be avoided by using smoke tests
import os
import pymongo
from pymongo import MongoClient
from datetime import date
# Tratamiento de datos
# ==============================================================================
import numpy as np
import pandas as pd
import string
import re
# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
# Plotly
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default='notebook'
# ==============================================================================
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
nltk.download('stopwords')
nltk.download('punkt') # first-time use only
nltk.download('wordnet') # first-time use only
from nltk.corpus import stopwords
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
import math
# Configuración warnings
# ==============================================================================
import warnings
[nltk_data] Downloading package stopwords to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package stopwords is already up-to-date! [nltk_data] Downloading package punkt to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package punkt is already up-to-date! [nltk_data] Downloading package wordnet to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package wordnet is already up-to-date!
# Smoke Test Parameters
# ==============================================================================
# Plotly configuration
plotly_template="plotly_dark"
# plotly_template="plotly"
# Filters:
JobsNameBlackList = ['test'] # Remove all jobs of analysis with this name
logsWhiteList = ["error","fail", "warning"] # Check lines of logs with this words
JobsStatusWhiteList = ["failed"] # Check jobs with this status
similarity = 0.6 # Group similar text
# DATA:
# mongoDbLimit=1000 # Limit of data request in mongodb
mongoDbLimit=False # Limit of data request in mongodb
csvRead=True
csvSave=True
csvName="data-02-logsAnalysis-" # CSV file name
csvFileRead="data-02-logsAnalysis13-08-2021.csv"
pathExperimentsFiles="/Users/ceciliocannavaciuolo/Documents/workspace/phd/experimentsGitlabColellector"
# Connect with DB
if not csvRead:
MONGODB_URL = os.environ.get('MONGODB_URL')
NODE_ENV = os.environ.get('NODE_ENV') or "dev"
DB_NAME = os.environ.get('APP_NAME') + "-"+ NODE_ENV
client = MongoClient()
client = MongoClient(MONGODB_URL)
db = client[DB_NAME]
if mongoDbLimit:
jobs = db.gitlablogs.find({}).limit(mongoDbLimit) # Read all data
else:
print("@Note-01 ---- 2018711928 -----")
jobs = db.gitlablogs.find({}) # Read all data
jobs = pd.DataFrame(list(jobs)) # Convert to DataFrame
print("List of data available iside of db structure")
# Save CSV
if csvSave:
today = date.today()
today = today.strftime("%d-%m-%Y")
jobs.to_csv(pathExperimentsFiles+'/dataAnalysis/'+csvName+today+'.csv', index = False)
else:
print("@Note-01 ---- 1350489220 -----")
jobs = pd.read_csv(pathExperimentsFiles+'/dataAnalysis/'+csvName+today+'.csv')
jobs.dtypes
@Note-01 ---- 1350489220 -----
_id object jobId int64 projectId int64 __v int64 allow_failure bool commitId object commitMessage object commitTitle object committedEmail object created_at object duration float64 jobName object jobRef object jobStage object jobStatus object pipelineId int64 pipelineRef object pipelineStatus object pipelineUrl object pipelineWebUrl object projectDescriptions object projectName object projectNameWithNamespace object queued_duration float64 runnerDescription object runnerId float64 runnerIpAddress object runnerName object sha object started_at object username object jobLog object dtype: object
print("------ DATA REPORT ------")
projectsNumber = len(jobs["projectName"].unique())
print("Number of Projects: "+ str(projectsNumber))
numberOfJobs = len(jobs.index)
print("Number of pipelines Jobs (Steps): "+ str(numberOfJobs))
numberOfSuccess= jobs.loc[jobs["jobStatus"] == "success"]["jobStatus"].count()
numberOfFailed= jobs.loc[jobs["jobStatus"] == "failed"]["jobStatus"].count()
numberOfCancel= jobs.loc[jobs["jobStatus"] == "canceled"]["jobStatus"].count()
successPercentage = (1-((numberOfSuccess+numberOfFailed+numberOfCancel)-numberOfSuccess)/(numberOfSuccess+numberOfFailed+numberOfCancel))*100
failedPercentage = (1-((numberOfSuccess+numberOfFailed+numberOfCancel)-numberOfFailed)/(numberOfSuccess+numberOfFailed+numberOfCancel))*100
canceledPercentage = (1-((numberOfSuccess+numberOfFailed+numberOfCancel)-numberOfCancel)/(numberOfSuccess+numberOfFailed+numberOfCancel))*100
print("Number of success Jobs (Steps): "+ str(numberOfSuccess) + " or "+str(successPercentage) + " %")
print("Number of failed Jobs (Steps): "+ str(numberOfFailed)+ " or "+str(failedPercentage) + " %")
print("Number of canceled Jobs (Steps): "+ str(numberOfCancel)+ " or "+str(canceledPercentage) + " %")
------ DATA REPORT ------ Number of Projects: 8 Number of pipelines Jobs (Steps): 1000 Number of success Jobs (Steps): 612 or 63.816475495307614 % Number of failed Jobs (Steps): 51 or 5.318039624608972 % Number of canceled Jobs (Steps): 296 or 30.865484880083415 %
fig = make_subplots(rows=1, cols=2)
fig = px.pie(jobs, names='jobStatus', title='Pipelines Jobs results',color="jobStatus",template=plotly_template)
fig.show()
def createBarGraphByJobStatus(variable):
jobStatusUnique = jobs["jobStatus"].unique().tolist()
df_list = []
for status in jobStatusUnique:
jobs_total = jobs.rename(columns={'jobStatus': status})
total = jobs_total.groupby(by=variable)[status].count()
df_list.append(total)
df = pd.concat(df_list,axis=1)
df = df.sort_values(by=[jobStatusUnique[0]],ascending=True)
fig = px.bar(df, orientation='h',template=plotly_template,title="Number of fails by "+ variable)
fig.show()
print(" Number of jobs projectName types")
createBarGraphByJobStatus("projectName")
print(" Number of jobs stage types")
createBarGraphByJobStatus("jobStage")
createBarGraphByJobStatus("pipelineRef")
Number of jobs projectName types
Number of jobs stage types
# Get fragment of text with error
# ==============================================================================
def getErrorText(texto):
#! Get only last range.
nuevo_texto = texto
#! Convert all text to lowercase.
try:
nuevo_texto = nuevo_texto.lower()
except:
return ""
#! Web page removal (words beginning with "http")
nuevo_texto = re.sub('http\S+', ' ', nuevo_texto)
nuevo_texto = nuevo_texto.split(sep = '\n');
whiteList = ["error"]; newTextList = []; nuevoTexto = ""
for text in nuevo_texto:
for listI in whiteList:
if listI in text:
#! Remove special characters
regex = '[\\!\\"\\#\\>\\<\\$\\%\\&\\\'\\(\\)\\*\\+\\,\\;\\\\\]\\<\\=\\,\\>\\?\\:\\-\\|\\@\\@\\\\^_\\`\\{\\|\\}\\~]'
text = re.sub(regex , ' ', text)
text = re.sub('http\S+', ' ', text)
#! Remove numbers
text = re.sub('\d+', ' ', text)
#! remove date
text = re.sub('\d{4}-\d{2}-\d{2}', ' ', text)
text = re.sub(' +', ' ', text)
text = re.sub('- - t : :','',text)
#! Removing emojis
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
"]+", flags=re.UNICODE)
text = emoji_pattern.sub(r'', text)
if not text in newTextList:
#! cut string.
text = text.strip()
text.replace('[','').replace(']','')
text = text[0:text.find(".")]
nuevoTexto = nuevoTexto + "\n" + text
newTextList.append(text)
newTextList = [string for string in newTextList if string.strip()]# Remove multiple empty spaces from string List
newTextList = [string for string in newTextList if string != " "] # Delete empty strings
newTextList = [string for string in newTextList if string != ""] # Delete empty strings
newTextList = [string for string in newTextList if (len(string) > 2)] # Eliminación de tokens con una longitud < 2 o que se encuentren en la lista de palabras ignoradas
unics = set(); newTextList = [string for string in newTextList if string not in unics and (unics.add(string) or True)] # Delete duplicate data
return newTextList
textExample = '''
* [new branch] frontend-test -> origin/frontend-test
* [new branch] functional-testing-junit -> origin/functional-testing-junit
* [new branch] master -> origin/master
* [new branch] sol-dev -> origin/sol-dev
* [new branch] sql-fix-branch -> origin/sql-fix-branch
* [new branch] testing-report -> origin/testing-report
* [new tag] CR-H1-2021-deploy-intermedio -> CR-H1-2021-deploy-intermedio
* [new tag] v1dffdfdfdf0_20190405 -df dff1dff0df0_20190405
* [new tag] v1dffdfdfdf1_20190424 -df dff1dff0df1_20190424
Checking out 18b927c2 as devdfdfdffdffdf
Skipping Git submodules setup
Downloading artifacts for build-jar (1473723797)dfdfdffdffdf
Downloading artifacts from coordinatordfdfdffdffdf ok id=1473723797 responseStatus=200 OK token=G4kf2uft
$ echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/confdfdfdfjson
$ /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/Dockerfile --destination $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
INFO[0001] Retrieving image manifest openjdk:11.0
INFO[0001] Retrieving image openjdk:11.0 from registry inddfdfdfdodfkdfrdfio
error building image: GET https://index.dockdfdfdfio/v2/library/openjdk/manifestdf/df1df0: TOOMANYREQUESTS: You have reached your pull ratedflidfitdf You may increase the limit by authenticating and upgrading: httdfs:/dfwdfwdfddfckerdfcom/increase-rate-limit
ERROR: Job failed: command terminated with exit code 1
'''
errorText = getErrorText(textExample)
# This need to do defferent to 0.
assert len(errorText) > 0
print("✅ PASS TEST: OK 👍")
textExample = '''
>>>> 03_add_account_toEntity-06 : |'\n │ '|------------------------------------------------|'\n │ \n │ 'https://edutelling-api-develop.openshidfdfdftecdfgdfpdfit/a\n │ pi/v1/tutors/create/tutor-from-ambassador?ambassadorId\n │ ='\n │ 'RESPONSE'\n │ \n │ `{\"success\":true,\"message\":\"Tutor '#17:-2' cdfeadfeddf\",\"\n │ data\":{\"tutorId\":\"#17:-2\"}}`\n │ '|-**********************************************-|'\n │ \n └\n\n→ 04_login_no2_v3\n POST https://edutelling-apidfdevdflopddfopendfhdfftdftdfchgapdfit/api/v1/auth/authentication [200 OK, 798B, 137ms]\n ✓ [(POST)/api/v1/auth/authentication] Login Delete (200)\n\n→ 05_login_switch_how_account\n POST https://edutellidfg-api-dfedfelopdfodffenshiftdfftechgapdfit/api/v1/auth/complete-authedftication df200 OK, 1df01KB, 27ms]\n ✓ CHECK IF EXIST JWT\n ✓ CHECK IF EXIST jwtRefresh\n ✓ [(POST)/api/v1/auth/authentication] Login Delete (200)\n\n→ 01_entityTypeAccountService_getAll\n GET https://dfdutellingdfdfpi-dedfelodfdfopensdfiftdftechgapdfit/apdf/v1/accounts/dfmbassador@botdfcom/all [200 OK, 680B, 23ms]\n ┌\n │ 'deleteAccountId :', '#12:1564'\n │ 'deleteAccountId :', '12%3A1564'\n └\n\n→ 02_reamoveAccount\n DELETE dfttps://eddftelldfndf-api-dedfelopdfodfenshiftdftechgapdfit/api/v1/accounts/remove?dataId=12%3A1564 [200 OK, 397B, 15ms]\n\n→ goTo_workflowControl\n OPdfIONS httpdf://edutdfflling-apidfdevelopdffopenshiftdftechgapdfit [404 Not Found, 255B, 4ms]\n\nAttempting to set next request to controlOfWorkFlowTempalte_06\n\n→ controlOfWorkFlowTempalte_06dfn OPTIONdf https:df/edfutelling-dfpi-devedfopdfopenshiftdftechgapdfit [404 Not Found, 255B, 6ms]\n ┌\n │ ' ------ ------ ---------- ---------'\n │ \n │ '{\"nameOfConfigFile\":\"config_multiAccount_tutorHowAmba\n │ ssador\",\"templateRun\":[\"01_entityTypeAccountService_ge\n │ tAll_06\",\"config_multiAccount_studentHowAmbassador\"],\"\n │ executed\":[false,true]}'\n │ \n │ 'WORKFLOW GO TO: ------------------------'\n │ \n │ 'WORKFLOW CONTROL REDIRECTING TEMPATE TO: ', 'config_m\n │ ultiAccount_studentHowAmbassador'\n │ \n │ \n └\n\nAttempting to set next request to config_multiAccount_studentHowAmbassador\n\n→ config_multiAccount_studentHowAmbadfsador\n dfPTIONS dfttps:df/edutellidfg-api-ddfvelopdfopenshiftdftechgapdfit [404 Not Found, 255B, 5ms]\n\nAttempting to set next request to controlOfWorkFlowTempalte_06\n\n→ controlOfWorkFlodfTempalte_df6\n OPdfIONS httdfs://edutedfling-apdf-developdfopenshiftdftechgapdfit [404 Not Found, 255B, 8ms]\n ┌\n │ ' ------ ------ ---------- ---------'\n │ \n │ '{\"nameOfConfigFile\":\"config_multiAccount_tutorHowAmba\n │ ssador\",\"templateRun\":[\"01_entityTypeAccountService_ge\n │ tAll_06\",\"stop_06_createCourseModuleAndStage\"]}'\n │ \n │ 'WORKFLOW GO TO: ------------------------'\n │ \n │ 'WORKFLOW CONTROL REDIRECTING TEMPATE TO: ', '01_entit\n │ yTypeAccountService_getAll_06'\n │ \n │ \n └\n\nAttempting to set next request to 01_entityTypeAccountService_getAll_06\n\n→ 01_entityTypeAcdfountServidfe_getAldf_06\n GET dfttps://eddftellindfdfapi-developdfopenshiftdftechgadfdfit/api/v1/accounts/student@botdfcom/all [200 OK, 475B, 8ms]\n ┌\n │ 'entityTypeUppperCase: ', 'Student'\n │ '20%3A273'\n df\n\n→ 02_dfeamoveAdfcount_06\n DELdfTE https:df/eduteldfing-api-developdfopenshiftdftechgapdfit/api/v1/accounts/remove?dataId=12%3A1564 [200 OK, 379B, 53dfs]\n\n→ 0df_add_acdfount_toEntity-06\ndf POST httdfs://edudfelling-api-developdfopenshiftdftechgapdfit/api/v1/ambassadors/create/ambassador-from-student?studentId=20%3A273 [200 OK, 420B, 84ms]\n ✓ [(POST) /api/v1/teachers/create/teacher-from-student?studentId ] Status code is 200\n ✓ Check if was Success to add access student how teacher\n ┌\n │ \n │ '|------------------------------------------------|'\n │ '| >>>> 03_add_account_toEntity-06 : |'\n │ '|-------------df---------df-------df----------------|'\n df│ \n │ 'dfttps://dfdutelling-api-developdfopenshiftdftechgapdfit/a\n │ pi/v1/ambassadors/create/ambassador-from-student?stude\n │ ntId='\n │ 'RdfSPONSE'\n │ \n │ `{\"success\":true,\"medfsage\":\"Ambassador '#19:-2' create\n │ ddf\",\"data\":{\"ambassadorId\":\"#19:-2\"}}`\n │ '|-********************************df*********df***-|'\df │ \n └\n\n→ 04_login_ndf2_v3\n PdfST httpdf://edutelling-api-developdfopenshiftdftechgapdfit/api/v1/auth/authentication [200 OK, 790B, 152ms]\n ✓ [(POST)/api/v1/auth/authenticatidfn] Login dfelete (df00)\n\n→ 05_login_switch_howdfaccount\ndf POST hddftps://edutelling-api-developdfopenshiftdftechgapddfit/api/v1/auth/complete-authentication [200 OK, 1df03KB, 23ms]\n ✓ CHECK IF EXIST JWT\n ✓ CHECK IF EXIST jwtRefresh\n ✓ [(POST)/api/v1/auth/authenticadfion] Logidf Deletedf(200)\n\n→ 01_entityTypeAccoundfSdfrvice_getdfll\n GdfT https://edutelling-api-develdfpdfopenshiftdftechgapdfit/api/v1/accounts/student@botdfcom/all [200 OK, 676B, 9ms]\n ┌\n │ 'deleteAccountId :', '#12:15df5'\n │ 'dfeleteAcdfountId :', '12%3A1565'\n └\n\n→ 02_dfeamoveAccdfunt\n dfELETE https://edutelling-api-developdfopenshiftdftechgapdfit/api/v1/accoudfts/removedfdataId=df2%3A1565 [200 OK, 397B, 37ms]\n\n→ goTodfworkflowCdfntrol\ndf OPTIONS https://edutelling-api-developdfopenshiftdftechgapdfit [404 Not Found, 255B, 6ms]\n\nAttempting to sedf next reqdfest to dfontrolOfWorkFlowTempalte_06\n\n→ controlOfdforkFlowTedfpalte_0df\n OPTIONS https://edutelling-api-developdfopenshiftdftechgapdfit [404 Not Found, 255B, 4ms]\n ┌\n │ ' ------ ------ ---------- ---------'\n │ \n │ '{\"nameOfConfigFile\":\"config_multiAccount_tutorHowAmba\n │ ssador\",\"templateRun\":[\"01_entityTypeAccountService_ge\n │ tAll_06\",\"stop_06_createCourseModuleAndStage\"],\"execut\n │ ed\":[false,true]}'\n │ \n │ 'WORKFLOW GO TO: ------------------------'\n │ \n │ 'WORKFLOW CONTROL REDIRECTING TEMPATE TO: ', 'stop_06_\n │ createCourseModuleAndStage'\n │ \n │ \n └\n\nAttempting to set nextdfrequest tdf stop_0df_createCourseModuleAndStage\n\n→ stop_06_creadfeCourseModfuleAndSdfage\n OPTIONS httpdf://edutelling-api-developdfopenshiftdftechgapdfit [404 Not Found, df55B, 5ms]\nsummary: 0\nNUMBER OF FAILS 0\ncollection run completeddf\n\n┌─────────────────────────┬───────────────────┬──────────────────┐\n│ │ executed │ failed │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ iterations │ 1 │ 0 │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ requests │ 425 │ 0 │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ test-scripts │ 425 │ 0 │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ prerequest-scripts │ 3 │ 0 │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ assertions │df 247 │ 0 │\n├─────────────────────────┴df──────────────────┴──────────────────┤\n│ total run duration: 4m df9df3s │\n├────────────────────────df───────────────────────────────────────┤\n│ total data received: 755df83KB (approx) df df │\n├───────────────────────────────────────────────────df─df──────────┤\n│ avedfagedfresponse time: 44ms [min: 3ms, max: 605ms, sdfddf: 58ms] │\n└──df───df───ddf───────df─────────────────────────────────────────────┘\nDone indfdf60df84sdddf\n\u001dff[32;1m$ echo 'http://edutelling-functional-test-backenddfdfpenshiftddftechgapdffit/'\u001b[0;m\nhttp://edutelling-functional-test-backenddfopenshiftdftechgapdfit/\nsection_end:1626336539:step_script\r\u001b[0Ksection_start:1626336539:cleanup_file_variables\r\u001b[0K\u001b[0K\u001b[36;1mCleaning up file based variables\u001b[0;m\n\u001b[0;msection_end:1626336540:cleanup_file_variables\r\u001b[0K\u001b[32;1mJob succeeded\n\u001b[0;m
'''
errorText = getErrorText(textExample)
print(errorText)
assert len(errorText) == 0
print("✅ PASS TEST: OK 👍")
✅ PASS TEST: OK 👍 [] ✅ PASS TEST: OK 👍
# The cleaning and tokenization function is applied to each job
# ==============================================================================
df = jobs
df['jobLog_token'] = df['jobLog'].apply(lambda x: getErrorText(x))
#! Delete all empty list
df = df[(df['jobLog_token'].str.len() != 0) | (df['jobLog_token'].str.len() != 0)]
df[['jobLog', 'jobLog_token']].head(10)
| jobLog | jobLog_token | |
|---|---|---|
| 0 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro] |
| 1 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro] |
| 2 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro, typeerror cannot read prop... |
| 3 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro] |
| 4 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro] |
| 5 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro] |
| 6 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro] |
| 7 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro] |
| 8 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro, typeerror cannot read prop... |
| 9 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check erro] |
# Obtaining a list of stopwords in English
# ==============================================================================
stop_words = list(stopwords.words('english'))
# Se añade la stoprword: amp, ax, ex
stop_words.extend(("amp", "xa", "xe"))
print(stop_words[:10])
['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]
# Unnest de la columna texto_tokenizado
# ==============================================================================
jobs_tidy = df.explode(column='jobLog_token')
jobs_tidy = jobs_tidy.drop(columns='jobLog')
jobs_tidy = jobs_tidy.rename(columns={'jobLog_token':'token'})
jobs_tidy.head(3)
| _id | jobId | projectId | __v | allow_failure | commitId | commitMessage | commitTitle | committedEmail | created_at | ... | projectNameWithNamespace | queued_duration | runnerDescription | runnerId | runnerIpAddress | runnerName | sha | started_at | username | token | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 611102da01a14071451c22c7 | 1452810390 | 15112024 | 0 | False | fa3cb44c5c76f4955d7b63f6159bc17916c4a29b | Merge branch '226-A-getStagesByTeacherId' into... | Merge branch '226-A-getStagesByTeacherId' into... | c.cannavacciuolo@anoki.it | 2021-07-26 15:10:20.281 | ... | Anoki S.r.l. / EDUtelling-Prj / edutelling-api | 0.257544 | shared-runners-manager-7 | NaN | 35.237.21.158 | gitlab-runner | fa3cb44c5c76f4955d7b63f6159bc17916c4a29b | 2021-07-26 15:23:34.058 | cecilio.cannav | health check erro |
| 1 | 6111047501a14071451c2354 | 1452810387 | 15112024 | 0 | False | fa3cb44c5c76f4955d7b63f6159bc17916c4a29b | Merge branch '226-A-getStagesByTeacherId' into... | Merge branch '226-A-getStagesByTeacherId' into... | c.cannavacciuolo@anoki.it | 2021-07-26 15:10:20.251 | ... | Anoki S.r.l. / EDUtelling-Prj / edutelling-api | 3.885320 | shared-runners-manager-3.gitlab.com | NaN | 104.196.48.2 | gitlab-runner | fa3cb44c5c76f4955d7b63f6159bc17916c4a29b | 2021-07-26 15:18:18.317 | cecilio.cannav | health check erro |
| 2 | 6111047501a14071451c2356 | 1452810384 | 15112024 | 0 | True | fa3cb44c5c76f4955d7b63f6159bc17916c4a29b | Merge branch '226-A-getStagesByTeacherId' into... | Merge branch '226-A-getStagesByTeacherId' into... | c.cannavacciuolo@anoki.it | 2021-07-26 15:10:20.225 | ... | Anoki S.r.l. / EDUtelling-Prj / edutelling-api | 0.132675 | shared-runners-manager-7 | NaN | 35.237.21.158 | gitlab-runner | fa3cb44c5c76f4955d7b63f6159bc17916c4a29b | 2021-07-26 15:15:53.474 | cecilio.cannav | health check erro |
3 rows × 32 columns
jobs_tidy.groupby(by='jobStatus')['token'].count()
jobStatus canceled 35 failed 197 success 770 Name: token, dtype: int64
jobs_tidy.groupby(by='projectName')['token'].count()
projectName API 150 EduTelling 106 TRusT-FE 44 app-prenotazioni-api 168 ckp-api 40 edutelling-api 482 pax-Italia-pot-cicd 12 Name: token, dtype: int64
# Total words per event
# ==============================================================================
print('--------------------------')
print('Total words per event')
print('--------------------------')
jobs_tidy.groupby(by='jobStatus')['token'].nunique()
-------------------------- Total words per event --------------------------
jobStatus canceled 7 failed 44 success 31 Name: token, dtype: int64
# Longitud media y desviación de los jobs de cada jobStatus
# ==============================================================================
temp_df = pd.DataFrame(jobs_tidy.groupby(by = ["jobStatus", "jobId"])["token"].count())
temp_df.reset_index().groupby("jobStatus")["token"].agg(['mean', 'std'])
| mean | std | |
|---|---|---|
| jobStatus | ||
| canceled | 1.250000 | 0.645497 |
| failed | 3.862745 | 2.522060 |
| success | 1.412844 | 0.793019 |
# Top 50 palabras más utilizadas por cada evento
# ==============================================================================
jobs_tidy_text = jobs_tidy.groupby(['jobStatus','token','commitMessage', 'jobStage', 'jobName'])['token'] \
.count() \
.reset_index(name='count') \
.groupby('jobStatus') \
.apply(lambda x: x.sort_values('count', ascending=False).head(10))
jobs_tidy_text
| jobStatus | token | commitMessage | jobStage | jobName | count | ||
|---|---|---|---|---|---|---|---|
| jobStatus | |||||||
| canceled | 17 | canceled | health check erro | Update .gitlab-ci.yml | package | docker-build | 2 |
| 25 | canceled | health check erro | fix error in app.js\n | package | docker-build | 1 | |
| 19 | canceled | health check erro | add step for check cluster conditions | functional-test | test-develop | 1 | |
| 20 | canceled | health check erro | amended .gitlab-ci.yml\n | package | docker-build | 1 | |
| 21 | canceled | health check erro | check smoke test | build | checkCluster | 1 | |
| 22 | canceled | health check erro | check smoke test | build | compile-java | 1 | |
| 23 | canceled | health check erro | check smoke test | build | logsStatus | 1 | |
| 24 | canceled | health check erro | check smoke test | build | podsUp | 1 | |
| 26 | canceled | health check erro | fix test | package | package-develop | 1 | |
| 1 | canceled | [info error stacktraces are turned on | Update .gitlab-ci.yml | build | compile-java | 1 | |
| failed | 151 | failed | fullyqualifiederrorid commandnotfoundexceptio | Update .gitlab-ci.yml | cloneProject | cloneProject | 5 |
| 69 | failed | [ msection end cleanup file variables\r[ k[... | Update .gitlab-ci.yml | cloneProject | cloneProject | 5 | |
| 34 | failed | [ merro[ m[ error while retrieving image fro... | changed conditional for enrollment dialog\n | package | package-prod | 1 | |
| 154 | failed | health check erro | Add cases of the smoke test, check dependencie... | package | check-cluster | 1 | |
| 156 | failed | health check erro | Allineamento staging to master\n | package | package-staging | 1 | |
| 157 | failed | health check erro | AutomaticTest- Fix\n | functional-test | test-develop | 1 | |
| 158 | failed | health check erro | Fix bug\n | functional-test | test-develop | 1 | |
| 159 | failed | health check erro | Fix, conflict\n | functional-test | test-develop | 1 | |
| 160 | failed | health check erro | Merge branch '185-bulk-companies' into 'develo... | functional-test | test-develop | 1 | |
| 161 | failed | health check erro | Merge branch '226-A-getStagesByTeacherId' into... | smoke-tests | SmokeTests | 1 | |
| success | 499 | success | health check erro | Merge branch '344-projectqueryfragments-500-er... | package | docker-build | 6 |
| 490 | success | health check erro | Merge branch '342-add-an-image-to-project-desc... | package | docker-build | 6 | |
| 523 | success | health check erro | Merge branch 'develop' into sidip\n | package | docker-build | 6 | |
| 521 | success | health check erro | Merge branch 'develop' into sidip\n | deploy | deploy-sidip | 5 | |
| 418 | success | health check erro | Merge C1-S21 and C2-S20, Important! Is necessa... | package | docker-build | 3 | |
| 829 | success | tlmgr action install returned an error continuing | Merge branch 'develop' into sidip\n | package | docker-build | 3 | |
| 851 | success | tlmgr an error has occurred | Merge branch 'develop' into sidip\n | package | docker-build | 3 | |
| 598 | success | health check erro | Update .gitlab-ci.yml | deploy | deploy-develop | 3 | |
| 874 | success | warning illegal reflective access by com | Merge branch '23-fix-report-device-in-repair' ... | build | compile-java | 2 | |
| 259 | success | [info error stacktraces are turned on | Merge branch '23-fix-report-device-in-repair' ... | build | compile-java | 2 |
# Clean text and apply filters
# ==============================================================================
def getTextFilter(textList):
unics = set(); textList = [string for string in textList if string not in unics and (unics.add(string) or True)] # Delete duplicate data
listText2 = []
for text in textList:
regex = '[\\!\\"\\#\\>\\<\\$\\%\\&\\\'\\(\\)\\*\\+\\,\\;\\\\\]\\<\\=\\,\\>\\?\\:\\-\\|\\@\\@\\\\^_\\`\\{\\|\\\\}\\~]'
text = text.lower()
text = re.sub(regex , ' ', text)
text = re.sub('http\S+', ' ', text)
text = text.replace("\n", "")
text = text + "\n"
# Delete stop
text_temp = []
for tweet in text.split(sep = ' '):
if tweet not in stop_words:
text_temp.append(tweet.replace('\n',''))
listText2 = listText2+text_temp
unics = set(); listText2 = [string for string in listText2 if string not in unics and (unics.add(string) or True)] # Delete duplicate data
listText2 = ', '.join(listText2)
return listText2
textList = ['Merge branch \'344-projectqueryfragments-500-error\' into \'develop\'\n\nResolve "projectQueryFragments 500 error"\n\nCloses #344\n\nSee merge request tech-gap-italia/ckp/ckp-api!247', 'Merge branch \'342-add-an-image-to-project-description\' into \'develop\'\n\nResolve "Add an image to project description"\n\nCloses #342\n\nSee merge request tech-gap-italia/ckp/ckp-api!245', "Merge branch 'develop' into sidip\n", 'Merge C1-S21 and C2-S20, Important! Is necessary update the db with 0303 and 0304\n', 'Update .gitlab-ci.yml', 'Merge branch \'23-fix-report-device-in-repair\' into \'dev\'\n\nResolve "Fix report device in repair"\n\nCloses #23\n\nSee merge request tech-gap-italia/pax-italia-pot/pax-italia-pot-api!36']
print(" --- --- Before applying the filter --- ---")
print(textList)
textOut = getTextFilter(textList)
print(" --- --- After applying the filter --- ---")
print(textOut)
--- --- Before applying the filter --- --- ['Merge branch \'344-projectqueryfragments-500-error\' into \'develop\'\n\nResolve "projectQueryFragments 500 error"\n\nCloses #344\n\nSee merge request tech-gap-italia/ckp/ckp-api!247', 'Merge branch \'342-add-an-image-to-project-description\' into \'develop\'\n\nResolve "Add an image to project description"\n\nCloses #342\n\nSee merge request tech-gap-italia/ckp/ckp-api!245', "Merge branch 'develop' into sidip\n", 'Merge C1-S21 and C2-S20, Important! Is necessary update the db with 0303 and 0304\n', 'Update .gitlab-ci.yml', 'Merge branch \'23-fix-report-device-in-repair\' into \'dev\'\n\nResolve "Fix report device in repair"\n\nCloses #23\n\nSee merge request tech-gap-italia/pax-italia-pot/pax-italia-pot-api!36'] --- --- After applying the filter --- --- merge, branch, , 344, projectqueryfragments, 500, error, develop, resolve, closes, 344see, request, tech, gap, italia/ckp/ckp, api, 247, 342, add, image, project, description, 342see, 245, sidip, c1, s21, c2, s20, important, necessary, update, db, 0303, 0304, .gitlab, ci.yml, 23, fix, report, device, repair, dev, 23see, italia/pax, italia, pot/pax, pot, 36
# Collect data
# https://sites.temple.edu/tudsc/2017/03/30/measuring-similarity-between-texts-in-python/
#================================================================================
def StemTokens(tokens):
return [stemmer.stem(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def StemNormalize(text):
return StemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
nltk.download('wordnet') # first-time use only
lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
LemVectorizer = CountVectorizer(tokenizer=LemNormalize, stop_words='english')
def idf(n,df):
result = math.log((n+1.0)/(df+1.0)) + 1
return result
def groupDataFrame(jobs_tidy_text, jobStatusUnique, similarity):
countI = -1
data = []
LemVectorizer = CountVectorizer(tokenizer=LemNormalize, stop_words='english')
for status in jobStatusUnique:
jobs_temp = jobs_tidy_text[jobs_tidy_text["jobStatus"] == status]
documents = jobs_temp['token'].to_list()
countX = jobs_temp['count'].to_list()
if len(documents) > 0:
LemVectorizer.fit_transform(documents)
tf_matrix = LemVectorizer.transform(documents).toarray()
tfidfTran = TfidfTransformer(norm="l2")
tfidfTran.fit(tf_matrix)
tfidf_matrix = tfidfTran.transform(tf_matrix)
cos_similarity_matrix = (tfidf_matrix * tfidf_matrix.T).toarray()
# Collect unics data:
commitMessage = ""; jobStage=""; jobName="";
for i in range(0,len(cos_similarity_matrix)):
countData = 0
for i2 in range(0,len(cos_similarity_matrix)):
if cos_similarity_matrix[i,i2] > similarity:
token = documents[i2]
countData = countData + countX[i2]
commitMessage = getTextFilter(jobs_temp['commitMessage'].to_list())
jobStage = getTextFilter(jobs_temp['jobStage'].to_list())
jobName = getTextFilter(jobs_temp['jobName'].to_list())
d = [status, token, countData,commitMessage,jobStage,jobName]
data.append(d)
data =list(map(list,set(map(tuple,data)))) # Deleret duplicate data
df_stps = pd.DataFrame(data, columns = ['jobStatus', 'token', 'count',"commitMessage","jobStage","jobName"])
if countI == -1:
countI = countI + 1
df_STPS = df_stps
else:
df_STPS.append(df_stps)
return df_STPS
# LemVectorizer
df_stps = groupDataFrame(pd.DataFrame(jobs_tidy_text,columns = ['jobStatus', 'token', 'count','commitMessage','jobStage','jobName']),
jobs["jobStatus"].unique().tolist(),
similarity
)
print("=======================================")
print("Text related to a similarity of:")
print("=======================================")
df_stps
======================================= Text related to a similarity of: =======================================
[nltk_data] Downloading package wordnet to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package wordnet is already up-to-date! /usr/local/Caskroom/miniforge/base/lib/python3.9/site-packages/sklearn/feature_extraction/text.py:388: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ha', 'le', 'u', 'wa'] not in stop_words.
| jobStatus | token | count | commitMessage | jobStage | jobName | |
|---|---|---|---|---|---|---|
| 0 | success | warning illegal reflective access by com | 2 | merge, branch, , 344, projectqueryfragments, 5... | package, deploy, build | docker, build, deploy, sidip, develop, compile... |
| 1 | canceled | [info error stacktraces are turned on | 1 | update, .gitlab, ci.yml, fix, error, app.js, a... | package, functional, test, build | docker, build, test, develop, checkcluster, co... |
| 2 | success | health check erro | 29 | merge, branch, , 344, projectqueryfragments, 5... | package, deploy, build | docker, build, deploy, sidip, develop, compile... |
| 3 | success | tlmgr action install returned an error continuing | 3 | merge, branch, , 344, projectqueryfragments, 5... | package, deploy, build | docker, build, deploy, sidip, develop, compile... |
| 4 | success | tlmgr an error has occurred | 3 | merge, branch, , 344, projectqueryfragments, 5... | package, deploy, build | docker, build, deploy, sidip, develop, compile... |
| 5 | canceled | health check erro | 10 | update, .gitlab, ci.yml, fix, error, app.js, a... | package, functional, test, build | docker, build, test, develop, checkcluster, co... |
| 6 | failed | [ msection end cleanup file variables\r[ k[... | 5 | update, .gitlab, ci.yml, changed, conditional,... | cloneproject, package, functional, test, smoke... | cloneproject, package, prod, check, cluster, s... |
| 7 | failed | health check erro | 7 | update, .gitlab, ci.yml, changed, conditional,... | cloneproject, package, functional, test, smoke... | cloneproject, package, prod, check, cluster, s... |
| 8 | success | [info error stacktraces are turned on | 2 | merge, branch, , 344, projectqueryfragments, 5... | package, deploy, build | docker, build, deploy, sidip, develop, compile... |
| 9 | failed | [ merro[ m[ error while retrieving image fro... | 1 | update, .gitlab, ci.yml, changed, conditional,... | cloneproject, package, functional, test, smoke... | cloneproject, package, prod, check, cluster, s... |
| 10 | failed | fullyqualifiederrorid commandnotfoundexceptio | 5 | update, .gitlab, ci.yml, changed, conditional,... | cloneproject, package, functional, test, smoke... | cloneproject, package, prod, check, cluster, s... |
nltk.download('wordnet') # first-time use only
lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
print(string.punctuation)
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
[nltk_data] Downloading package wordnet to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package wordnet is already up-to-date!
# Order dataframe.
df_stps= df_stps.sort_values(by=['jobStatus'])
fig = go.Figure(data=[go.Table(header=dict(values=[
'jobStatus',
'STPS',"Events"
]),
columnwidth = [90,90,30,40,20,10],
cells=dict(values=[
df_stps['token'].to_list(),
df_stps['commitMessage'].to_list(),
df_stps['jobStage'].to_list(),
df_stps['jobName'].to_list(),
df_stps['jobStatus'].to_list(),
df_stps['count'].to_list()
],
align='left',
font_size=14,
height=30
))
])
fig.show()
df_stps.to_csv(pathExperimentsFiles+'/dataAnalysis/'+csvName+today+'_STPS.csv', index = False)
# Top 10 palabras por jobStatus (sin stopwords)
# ==============================================================================
jobStatusUnique = df_stps["jobStatus"].unique().tolist()
df_list = []
for status in jobStatusUnique:
jobs_total = df_stps.rename(columns={'jobStatus': status})
total = jobs_total.groupby(by="token")[status].count()
df_list.append(total)
df = pd.concat(df_list,axis=1)
df = df.sort_values(by=["failed"],ascending=True)
fig = px.bar(df, orientation='h',template=plotly_template,title="Number of fails by token")
fig.show()